package com.novel.crawler.utils;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;

public class JsoupUtil {


    /**
     * 根据规则，从html字符串中获取绝对链接
     *
     * @param html
     * @param format
     * @param url
     * @return list
     */
    public static List<String> ParseHref(String html, String format, String url) {
        Elements elements = Jsoup.parse(html, url).select(format);
        ArrayList<String> list = new ArrayList<String>();
        for (Element element : elements) {
            list.add(element.attr("abs:href"));
        }
        return list;
    }

    /**
     * 根据输入的规则获取里面的text内容
     *
     * @param html
     * @param format
     * @return text
     */
    public static String ParseText(String html, String format) {
        return Jsoup.parse(html).select(format).text();
    }

}
